### Linux command
abdir="/rds/general/user/la420/home/CUTnTAG/antibodies"
corrdir="${abdir}/correlation"
bamdir="${abdir}/bam/sorted_bam"
beddir="${corrdir}/ENCODE_peak_files"
ENCODE_bamdir="${corrdir}/ENCODE_peak_files/bam/sorted_bam/fixed"
old_ENCODE_bamdir="${corrdir}/ENCODE_peak_files/bam/sorted_bam"
outdir="${corrdir}/all_marks"
#mergedbeddir="${abdir}/all_peaks/merged"
mark="H3K27ac_ENCFF044JNJ"
#mark="H3K27me3_ENCFF126QYP"
bedtools multicov -bams \
$bamdir/Abcam-ab177178_rmDup.sorted.fixed2.bam \
$bamdir/Abcam-ab4729_rmDup.sorted.fixed2.bam \
$bamdir/ActiveMotif_rmDup.sorted.fixed2.bam \
$bamdir/Diagenode_100x_rmDup.sorted.fixed2.bam \
$bamdir/Diagenode_50x_rmDup.sorted.fixed2.bam \
$bamdir/H3K27me3_rmDup.sorted.fixed2.bam \
$old_ENCODE_bamdir/H3K27ac_ENCFF384ZZM_sorted.bam \
$old_ENCODE_bamdir/H3K27me3_ENCFF676ORH_sorted.bam \
$ENCODE_bamdir/H3K4me1_ENCFF196QGZ_sorted.fixed.bam \
$ENCODE_bamdir/H3K4me3_ENCFF915MJO_sorted.fixed.bam \
$ENCODE_bamdir/H3K9ac_ENCFF204DNG_sorted.fixed.bam \
$ENCODE_bamdir/H3K9me3_ENCFF805FLY_sorted.fixed.bam \
$ENCODE_bamdir/H3K36me3_ENCFF190AQC_sorted.fixed.bam \
$ENCODE_bamdir/DNase_ENCFF826DJP_sorted.fixed.bam \
-bed $beddir/${mark}.bed.narrowPeak > $outdir/${mark}_overlaps.txtcountsList_copy = countsList
names = c("Abcam-ab177178", "Abcam-ab4729", "Active Motif", "Diagenode (1:100)", "Diagenode (1:50)", "H3K27me3", "ENCODE_H3K27ac", "ENCODE_H3K27me3", "ENCODE_H3K4me1", "ENCODE_H3K4me3", "ENCODE_H3K9ac", "ENCODE_H3K9me3", "ENCODE_H3K36me3", "ENCODE_DNase")
for(i in 1:length(countsList_copy)){
countsList_copy[[i]] = countsList_copy[[i]][,11:24] %>% as.matrix() %>% preprocessCore::normalize.quantiles() %>% round() %>% cor(method = "pearson")
colnames(countsList_copy[[i]]) = names
rownames(countsList_copy[[i]]) = names
}H3K27ac: Enhancers, promoters; permissive
H3K27me3: Promoters, gene-rich regions; repressing
H3K9me3: Satellite repeats, telomeres, pericentromeres; repressing
H3K9ac: Enhancers, promoters; permissive
H3K4me1: Enhancers; permissive
H3K4me3: Promoters; permissive
H3K36me3: Transcribed regions; permissive
DNase: Open chromatin regions
datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies/correlation/all_marks/"
counts = read.csv(paste0(datadir, "hg19_500bp_overlaps_old.txt"), sep = "\t", header = FALSE)counts_copy = counts
counts_copy = counts_copy[,4:17] %>% as.matrix() %>% preprocessCore::normalize.quantiles() %>% round() %>% cor(method = "pearson")
colnames(counts_copy) = names
rownames(counts_copy) = namescorrplot(counts_copy, method = "color", col = col, type = "upper", order = "original", tl.col = "black", tl.srt = 50, tl.cex = 0.5, cl.lim = c(0,1), addCoef.col = "black", number.cex = 0.5)counts = counts_copy %>% round(3)
heatmap.2(x = counts, col = col, trace = "none", density.info = "none", cexRow = 1, cexCol = 1, cellnote = counts, notecex = 1, notecol = "black", lhei = c(1, 10), lwid = c(1, 7), margins = c(15, 15))corrplot(countsList_copy[[1]], method = "color", col = col2, type = "upper", order = "original", tl.col = "black", tl.srt = 50, tl.cex = 0.5, cl.lim = c(0,1), addCoef.col = "black", number.cex = 0.5)counts = countsList_copy[[1]] %>% round(3)
heatmap.2(x = counts, col = col, trace = "none", density.info = "none", cexRow = 1, cexCol = 1, cellnote = counts, notecex = 1, notecol = "black", lhei = c(1, 10), lwid = c(1, 7), margins = c(15, 15))corrplot(countsList_copy[[2]], method = "color", col = col2, type = "upper", order = "original", tl.col = "black", tl.srt = 50, tl.cex = 0.5, cl.lim = c(0,1), addCoef.col = "black", number.cex = 0.5)counts = countsList_copy[[2]] %>% round(3)
heatmap.2(x = counts, col = col, trace = "none", density.info = "none", cexRow = 1, cexCol = 1, cellnote = counts, notecex = 1, notecol = "black", lhei = c(1, 10), lwid = c(1, 7), margins = c(15, 15))datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies/correlation/all_marks/"
markList = c("SEACR_noDup", "MACS2_noDup")
countsList = list()
for(i in 1:length(markList)){
countsList[[i]] = read.csv(paste0(datadir, markList[i], "_overlaps.txt"), sep = "\t", header = FALSE)
}countsList_copy = countsList
for(i in 1:length(countsList_copy)){
countsList_copy[[i]] = countsList_copy[[i]][,4:17] %>% as.matrix() %>% preprocessCore::normalize.quantiles() %>% round() %>% cor(method = "pearson")
colnames(countsList_copy[[i]]) = names
rownames(countsList_copy[[i]]) = names
}ATAC-seq libraries: ENCLB918NXF, ENCLB758GEG from https://www.encodeproject.org/experiments/ENCSR483RKN/
peakdir = "/rds/general/user/la420/home/CUTnTAG/ATACseq/nfcore/results/bwa/mergedReplicate/macs/narrowPeak"
ATAC_path = file.path(peakdir, "K562_ATACseq.mRp.clN_peaks.narrowPeak")
ATAC = ChIPseeker::readPeakFile(ATAC_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)
extraCols_narrowPeak = c(signalValue = "numeric", pValue = "numeric", qValue = "numeric", peak = "integer")
ENCODE_H3K27ac = rtracklayer::import("https://www.encodeproject.org/files/ENCFF044JNJ/@@download/ENCFF044JNJ.bed.gz", format = "BED", extraCols = extraCols_narrowPeak)overlaps_ATAC_ENCODE = GenomicRanges::countOverlaps(query = ATAC, subject = ENCODE_H3K27ac)
overlaps_ENCODE_ATAC = GenomicRanges::countOverlaps(query = ENCODE_H3K27ac, subject = ATAC)
length(overlaps_ATAC_ENCODE[overlaps_ATAC_ENCODE != 0])## [1] 26673
## [1] 35510
## [1] 85342
## [1] 51176
datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729", "H3K27me3")
Total_ENCODE_inPeakData = c()
peakRes = data.frame(ENCODE_H3K27ac)
for(sample in sampleList){
peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
Total_ENCODE_inPeakN = counts(fragment_counts)[,1] %>% sum
Total_ENCODE_inPeakData = rbind(Total_ENCODE_inPeakData, data.frame(Antibody = sample, Total_ENCODE_inPeakN = Total_ENCODE_inPeakN))
}
Total_ENCODE_inPeakDatadatadir_ENCODE = "/rds/general/user/la420/home/CUTnTAG/antibodies"
ENCODE_sampleList = c("H3K27ac_ENCFF384ZZM", "H3K27me3_ENCFF676ORH")
Total_ENCODE_inPeakData = c()
peakRes = data.frame(ENCODE_H3K27ac)
for(sample in sampleList){
peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = FALSE, by_rg = FALSE, format = "bam")
Total_ENCODE_inPeakN = counts(fragment_counts)[,1] %>% sum
Total_ENCODE_inPeakData = rbind(Total_ENCODE_inPeakData, data.frame(Antibody = sample, Total_ENCODE_inPeakN = Total_ENCODE_inPeakN))
}
Total_ENCODE_inPeakDatadatadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729", "H3K27me3")
sampleList = c("H3K27me3")
Total_ATAC_inPeakData = c()
peakRes = data.frame(ATAC)
for(sample in sampleList){
peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
Total_ATAC_inPeakN = counts(fragment_counts)[,1] %>% sum
Total_ATAC_inPeakData = rbind(Total_ATAC_inPeakData, data.frame(Antibody = sample, Total_ATAC_inPeakN = Total_ATAC_inPeakN))
}
Total_ATAC_inPeakDatadatadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729", "H3K27me3")
ATAC_inPeakData = c()
peakRes = data.frame(ATAC_not_ENCODE)
for(sample in sampleList){
peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
ATAC_inPeakN = counts(fragment_counts)[,1] %>% sum
ATAC_inPeakData = rbind(ATAC_inPeakData, data.frame(Antibody = sample, ATAC_inPeakN = ATAC_inPeakN))
}
ATAC_inPeakDatadatadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729", "H3K27me3")
ENCODE_inPeakData = c()
peakRes = data.frame(ENCODE_not_ATAC)
for(sample in sampleList){
peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
ENCODE_inPeakN = counts(fragment_counts)[,1] %>% sum
ENCODE_inPeakData = rbind(ENCODE_inPeakData, data.frame(Antibody = sample, ENCODE_inPeakN = ENCODE_inPeakN))
}
ENCODE_inPeakDatadatadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
ENCODE_sampleList = c("H3K27ac_ENCFF384ZZM", "H3K27me3_ENCFF676ORH")
peakRes = data.frame(ATAC_not_ENCODE)
ENCODE_ATAC_inPeakData = c()
for(sample in ENCODE_sampleList){
peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
bamFile = paste0(datadir, "/correlation/ENCODE_peak_files/bam/", sample, ".bam")
fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = FALSE, by_rg = FALSE, format = "bam")
ENCODE_ATAC_inPeakN = counts(fragment_counts)[,1] %>% sum
ENCODE_ATAC_inPeakData = rbind(ENCODE_ATAC_inPeakData, data.frame(Antibody = sample, ENCODE_ATAC_inPeakN = ENCODE_ATAC_inPeakN))
}
ENCODE_total_frags = c(9770603, 12064137)
ENCODE_ATAC_inPeakData$UniqueFragNum = ENCODE_total_frags
ENCODE_ATAC_inPeakData$Percentage = ENCODE_ATAC_inPeakData$ENCODE_ATAC_inPeakN / ENCODE_ATAC_inPeakData$UniqueFragNum * 100
ENCODE_ATAC_inPeakData## summarize the duplication information from the picard summary outputs
picarddir = "/rds/general/user/la420/home/CUTnTAG/antibodies/picard_summary/"
dupResult = c()
for(sample in sampleList){
dupRes = read.table(paste0(picarddir, sample, "_picard.rmDup.txt"), header = TRUE, fill = TRUE)
dupResult = data.frame(Antibody = sample, MappedFragNum_hg19 = dupRes$READ_PAIRS_EXAMINED[1] %>% as.character %>% as.numeric, DuplicationRate = dupRes$PERCENT_DUPLICATION[1] %>% as.character %>% as.numeric * 100) %>% mutate(UniqueFragNum = (MappedFragNum_hg19 * (1-DuplicationRate/100)) %>% round()) %>% rbind(dupResult, .)
}
dupResult = dupResult[c("Antibody", "UniqueFragNum")]
dupResultpeakData = dupResult %>% left_join(Total_ENCODE_inPeakData, by = "Antibody") %>% left_join(ENCODE_inPeakData, by = "Antibody") %>% left_join(Total_ATAC_inPeakData, by = "Antibody") %>% left_join(ATAC_inPeakData, by = "Antibody")
peakData$Percentage_ATAC = peakData$Total_ATAC_inPeakN / peakData$UniqueFragNum * 100
peakData$Percentage_ENCODE = peakData$Total_ENCODE_inPeakN / peakData$UniqueFragNum * 100
peakData$Percentage_ATAC_only = peakData$ENCODE_inPeakN / peakData$UniqueFragNum * 100
peakData$Percentage_ENCODE_only = peakData$ATAC_inPeakN / peakData$UniqueFragNum * 100
peakData = peakData %>% dplyr::select(Antibody, UniqueFragNum, Total_ENCODE_inPeakN, ENCODE_inPeakN, Total_ATAC_inPeakN, ATAC_inPeakN, Percentage_ENCODE, Percentage_ENCODE_only, Percentage_ATAC, Percentage_ATAC_only)
colnames(peakData) = c("Antibody", "Unique Fragments", "Fragments in ENCODE H3K27ac peaks", "Fragments in ENCODE H3K27ac peaks (exc ATAC)", "Fragments in ATAC peaks", "Fragments in ATAC peaks (exc ENCODE)", "% of fragments in ENCODE H3K27ac (total)", "% of fragments in ENCODE H3K27ac (exc. ATAC)", "% of fragments in ATAC (total)", "% of fragments in ATAC (exc. ENCODE)")
peakDataATAC_peakData = peakData[c("Antibody", "% of fragments in ENCODE H3K27ac (total)", "% of fragments in ATAC (total)", "% of fragments in ENCODE H3K27ac (exc. ATAC)", "% of fragments in ATAC (exc. ENCODE)")]
ATAC_peakData_melt = melt(ATAC_peakData, id.vars = c("Antibody"))
ENCODE_ATAC_plot = ggplot(ATAC_peakData_melt, aes(Antibody, value)) +
geom_bar(aes(fill = variable),
width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +
ylim(0, 40) +
ylab("% of fragments in peaks") +
xlab("Antibody") +
ggtitle("Fragments in ENCODE H3K27ac vs ATAC peaks") +
theme_bw()
ENCODE_ATAC_plotENCODE fragments in ATAC-only peaks
ENCODE_in_ATAC_plot = ggplot(ENCODE_ATAC_inPeakData, aes(Antibody, Percentage)) +
geom_bar(aes(fill = Antibody),
width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +
ylim(0, 40) +
ylab("% of fragments in peaks") +
xlab("Antibody") +
ggtitle("ENCODE fragments in ATAC-only peaks") +
theme_bw()
ENCODE_in_ATAC_plotpeakdir_IgG = "/rds/general/user/la420/home/CUTnTAG/IgG/peakCalling/all_peaks/"
peakdir_no_IgG = "/rds/general/user/la420/home/CUTnTAG/antibodies/all_peaks/"
sampleList = c("ActiveMotif_SEACR", "ActiveMotif_MACS2", "Diagenode_100x_SEACR", "Diagenode_100x_MACS2", "Diagenode_50x_SEACR", "Diagenode_50x_MACS2", "Abcam-ab177178_SEACR", "Abcam-ab177178_MACS2", "Abcam-ab4729_SEACR", "Abcam-ab4729_MACS2", "H3K27me3_SEACR", "H3K27me3_MACS2")
Total_CT_peaks = c()
for(sample in sampleList){
peakInfo_IgG = read.table(paste0(peakdir_IgG, sample, "_IgG.bed"), header = FALSE, fill = TRUE)
peakInfo_no_IgG = read.table(paste0(peakdir_no_IgG, sample, ".bed"), header = FALSE, fill = TRUE)
Total_CT_peaks = data.frame(Antibody = sample, Total_peaks_with_IgG = nrow(peakInfo_IgG), Total_peaks_without_IgG = nrow(peakInfo_no_IgG)) %>% rbind(Total_CT_peaks, .)
}
Total_CT_peaks %>% dplyr::select(Antibody, Total_peaks_with_IgG, Total_peaks_without_IgG) extraCols_narrowPeak = c(signalValue = "numeric", pValue = "numeric", qValue = "numeric", peak = "integer")
ENCODE_H3K27ac = rtracklayer::import("https://www.encodeproject.org/files/ENCFF044JNJ/@@download/ENCFF044JNJ.bed.gz", format = "BED", extraCols = extraCols_narrowPeak)
ENCODE_H3K27me3 = rtracklayer::import("https://www.encodeproject.org/files/ENCFF126QYP/@@download/ENCFF126QYP.bed.gz", format = "BED", extraCols = extraCols_narrowPeak)ENCODE_H3K27ac_overlapping_peaks_noIgG = c()
ENCODE_H3K27ac_overlapping_peaks_IgG = c()
for(i in 1:length(peak_list_IgG)){
overlaps_H3K27ac_noIgG = GenomicRanges::countOverlaps(query = peak_list_noIgG[[i]], subject = ENCODE_H3K27ac)
overlaps_H3K27ac_IgG = GenomicRanges::countOverlaps(query = peak_list_IgG[[i]], subject = ENCODE_H3K27ac)
CT_peaks_in_ENCODE_H3K27ac_noIgG = overlaps_H3K27ac_noIgG[overlaps_H3K27ac_noIgG != 0] %>% length()
CT_peaks_in_ENCODE_H3K27ac_IgG = overlaps_H3K27ac_IgG[overlaps_H3K27ac_IgG != 0] %>% length()
ENCODE_H3K27ac_overlapping_peaks_noIgG = c(ENCODE_H3K27ac_overlapping_peaks_noIgG, CT_peaks_in_ENCODE_H3K27ac_noIgG)
ENCODE_H3K27ac_overlapping_peaks_IgG = c(ENCODE_H3K27ac_overlapping_peaks_IgG, CT_peaks_in_ENCODE_H3K27ac_IgG)
}
Total_CT_peaks_falling_into_ENCODE = data.frame(Antibody = sampleList, CT_peaks_in_ENCODE_H3K27ac_noIgG = ENCODE_H3K27ac_overlapping_peaks_noIgG, CT_peaks_in_ENCODE_H3K27ac_IgG = ENCODE_H3K27ac_overlapping_peaks_IgG)
Total_CT_peaks_falling_into_ENCODEcaptured_H3K27ac_ENCODE_list_noIgG = c()
captured_H3K27ac_ENCODE_list_IgG = c()
for(i in 1:length(peak_list_IgG)){
overlaps_H3K27ac_noIgG = GenomicRanges::countOverlaps(query = ENCODE_H3K27ac, subject = peak_list_noIgG[[i]])
overlaps_H3K27ac_IgG = GenomicRanges::countOverlaps(query = ENCODE_H3K27ac, subject = peak_list_IgG[[i]])
ENCODE_H3K27ac_captured_peaks_noIgG = overlaps_H3K27ac_noIgG[overlaps_H3K27ac_noIgG != 0] %>% length()
ENCODE_H3K27ac_captured_peaks_IgG = overlaps_H3K27ac_IgG[overlaps_H3K27ac_IgG != 0] %>% length()
captured_H3K27ac_ENCODE_list_noIgG = c(captured_H3K27ac_ENCODE_list_noIgG, ENCODE_H3K27ac_captured_peaks_noIgG)
captured_H3K27ac_ENCODE_list_IgG = c(captured_H3K27ac_ENCODE_list_IgG, ENCODE_H3K27ac_captured_peaks_IgG)
}
Total_captured_ENCODE_peaks = data.frame(Antibody = sampleList, ENCODE_H3K27ac_captured_peaks_noIgG = captured_H3K27ac_ENCODE_list_noIgG, ENCODE_H3K27ac_captured_peaks_IgG = captured_H3K27ac_ENCODE_list_IgG)
Total_captured_ENCODE_peaksENCODE_overlaps_H3K27ac = Total_CT_peaks %>% left_join(Total_captured_ENCODE_peaks, by = "Antibody") %>% left_join(Total_CT_peaks_falling_into_ENCODE, by = "Antibody")
ENCODE_overlaps_H3K27ac$Percentage_of_total_CT_IgG = ENCODE_overlaps_H3K27ac$CT_peaks_in_ENCODE_H3K27ac_IgG / ENCODE_overlaps_H3K27ac$Total_peaks_with_IgG * 100
ENCODE_overlaps_H3K27ac$Percentage_of_total_CT_noIgG = ENCODE_overlaps_H3K27ac$CT_peaks_in_ENCODE_H3K27ac_noIgG / ENCODE_overlaps_H3K27ac$Total_peaks_without_IgG * 100
ENCODE_overlaps_H3K27ac$Percentage_of_total_ENCODE_IgG = ENCODE_overlaps_H3K27ac$ENCODE_H3K27ac_captured_peaks_IgG / length(ENCODE_H3K27ac) * 100
ENCODE_overlaps_H3K27ac$Percentage_of_total_ENCODE_noIgG = ENCODE_overlaps_H3K27ac$ENCODE_H3K27ac_captured_peaks_noIgG / length(ENCODE_H3K27ac) * 100
ENCODE_overlaps_H3K27ac = ENCODE_overlaps_H3K27ac[c("Antibody", "Total_peaks_with_IgG", "Total_peaks_without_IgG", "CT_peaks_in_ENCODE_H3K27ac_IgG", "Percentage_of_total_CT_IgG", "CT_peaks_in_ENCODE_H3K27ac_noIgG", "Percentage_of_total_CT_noIgG", "ENCODE_H3K27ac_captured_peaks_IgG", "Percentage_of_total_ENCODE_IgG", "ENCODE_H3K27ac_captured_peaks_noIgG", "Percentage_of_total_ENCODE_noIgG")]
ENCODE_overlaps_H3K27acENCODE_overlaps_H3K27ac_perc = ENCODE_overlaps_H3K27ac[c("Antibody", "Percentage_of_total_ENCODE_noIgG", "Percentage_of_total_ENCODE_IgG")]
colnames(ENCODE_overlaps_H3K27ac_perc) = c("Antibody", "no IgG", "IgG")
ENCODE_overlaps_H3K27ac_melt = melt(ENCODE_overlaps_H3K27ac_perc, id.vars = c("Antibody"))
ENCODE_overlap_plot = ggplot(ENCODE_overlaps_H3K27ac_melt, aes(Antibody, value)) +
geom_bar(aes(fill = variable),
width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +
ylim(0, 100) +
ylab("% of ENCODE peaks captured") +
xlab("Antibody") +
ggtitle("ENCODE peaks captured by C&T (H3K27ac)") +
theme_bw()
ENCODE_overlap_plotCT_in_ENCODE_H3K27ac_perc = ENCODE_overlaps_H3K27ac[c("Antibody", "Percentage_of_total_CT_noIgG", "Percentage_of_total_CT_IgG")]
colnames(CT_in_ENCODE_H3K27ac_perc) = c("Antibody", "no IgG", "IgG")
CT_in_ENCODE_H3K27ac_melt = melt(CT_in_ENCODE_H3K27ac_perc, id.vars = c("Antibody"))
ENCODE_overlap_plot = ggplot(CT_in_ENCODE_H3K27ac_melt, aes(Antibody, value)) +
geom_bar(aes(fill = variable),
width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +
ylim(0, 100) +
ylab("% of sample peaks in ENCODE") +
xlab("Antibody") +
ggtitle("C&T peaks falling into ENCODE (H3K27ac)") +
theme_bw()
ENCODE_overlap_plotENCODE_H3K27me3_overlapping_peaks_noIgG = c()
ENCODE_H3K27me3_overlapping_peaks_IgG = c()
for(i in 1:length(peak_list_IgG)){
overlaps_H3K27me3_noIgG = GenomicRanges::countOverlaps(query = peak_list_noIgG[[i]], subject = ENCODE_H3K27me3)
overlaps_H3K27me3_IgG = GenomicRanges::countOverlaps(query = peak_list_IgG[[i]], subject = ENCODE_H3K27me3)
CT_peaks_in_ENCODE_H3K27me3_noIgG = overlaps_H3K27me3_noIgG[overlaps_H3K27me3_noIgG != 0] %>% length()
CT_peaks_in_ENCODE_H3K27me3_IgG = overlaps_H3K27me3_IgG[overlaps_H3K27me3_IgG != 0] %>% length()
ENCODE_H3K27me3_overlapping_peaks_noIgG = c(ENCODE_H3K27me3_overlapping_peaks_noIgG, CT_peaks_in_ENCODE_H3K27me3_noIgG)
ENCODE_H3K27me3_overlapping_peaks_IgG = c(ENCODE_H3K27me3_overlapping_peaks_IgG, CT_peaks_in_ENCODE_H3K27me3_IgG)
}
Total_CT_peaks_falling_into_ENCODE = data.frame(Antibody = sampleList, CT_peaks_in_ENCODE_H3K27me3_noIgG = ENCODE_H3K27me3_overlapping_peaks_noIgG, CT_peaks_in_ENCODE_H3K27me3_IgG = ENCODE_H3K27me3_overlapping_peaks_IgG)
Total_CT_peaks_falling_into_ENCODEcaptured_H3K27me3_ENCODE_list_noIgG = c()
captured_H3K27me3_ENCODE_list_IgG = c()
for(i in 1:length(peak_list_IgG)){
overlaps_H3K27me3_noIgG = GenomicRanges::countOverlaps(query = ENCODE_H3K27me3, subject = peak_list_noIgG[[i]])
overlaps_H3K27me3_IgG = GenomicRanges::countOverlaps(query = ENCODE_H3K27me3, subject = peak_list_IgG[[i]])
ENCODE_H3K27me3_captured_peaks_noIgG = overlaps_H3K27me3_noIgG[overlaps_H3K27me3_noIgG != 0] %>% length()
ENCODE_H3K27me3_captured_peaks_IgG = overlaps_H3K27me3_IgG[overlaps_H3K27me3_IgG != 0] %>% length()
captured_H3K27me3_ENCODE_list_noIgG = c(captured_H3K27me3_ENCODE_list_noIgG, ENCODE_H3K27me3_captured_peaks_noIgG)
captured_H3K27me3_ENCODE_list_IgG = c(captured_H3K27me3_ENCODE_list_IgG, ENCODE_H3K27me3_captured_peaks_IgG)
}
Total_captured_ENCODE_peaks = data.frame(Antibody = sampleList, ENCODE_H3K27me3_captured_peaks_noIgG = captured_H3K27me3_ENCODE_list_noIgG, ENCODE_H3K27me3_captured_peaks_IgG = captured_H3K27me3_ENCODE_list_IgG)
Total_captured_ENCODE_peaksENCODE_overlaps_H3K27me3 = Total_CT_peaks %>% left_join(Total_captured_ENCODE_peaks, by = "Antibody") %>% left_join(Total_CT_peaks_falling_into_ENCODE, by = "Antibody")
ENCODE_overlaps_H3K27me3$Percentage_of_total_CT_IgG = ENCODE_overlaps_H3K27me3$CT_peaks_in_ENCODE_H3K27me3_IgG / ENCODE_overlaps_H3K27me3$Total_peaks_with_IgG * 100
ENCODE_overlaps_H3K27me3$Percentage_of_total_CT_noIgG = ENCODE_overlaps_H3K27me3$CT_peaks_in_ENCODE_H3K27me3_noIgG / ENCODE_overlaps_H3K27me3$Total_peaks_without_IgG * 100
ENCODE_overlaps_H3K27me3$Percentage_of_total_ENCODE_IgG = ENCODE_overlaps_H3K27me3$ENCODE_H3K27me3_captured_peaks_IgG / length(ENCODE_H3K27me3) * 100
ENCODE_overlaps_H3K27me3$Percentage_of_total_ENCODE_noIgG = ENCODE_overlaps_H3K27me3$ENCODE_H3K27me3_captured_peaks_noIgG / length(ENCODE_H3K27me3) * 100
ENCODE_overlaps_H3K27me3 = ENCODE_overlaps_H3K27me3[c("Antibody", "Total_peaks_with_IgG", "Total_peaks_without_IgG", "CT_peaks_in_ENCODE_H3K27me3_IgG", "Percentage_of_total_CT_IgG", "CT_peaks_in_ENCODE_H3K27me3_noIgG", "Percentage_of_total_CT_noIgG", "ENCODE_H3K27me3_captured_peaks_IgG", "Percentage_of_total_ENCODE_IgG", "ENCODE_H3K27me3_captured_peaks_noIgG", "Percentage_of_total_ENCODE_noIgG")]
ENCODE_overlaps_H3K27me3ENCODE_overlaps_H3K27me3_perc = ENCODE_overlaps_H3K27me3[c("Antibody", "Percentage_of_total_ENCODE_noIgG", "Percentage_of_total_ENCODE_IgG")]
colnames(ENCODE_overlaps_H3K27me3_perc) = c("Antibody", "no IgG", "IgG")
ENCODE_overlaps_H3K27me3_melt = melt(ENCODE_overlaps_H3K27me3_perc, id.vars = c("Antibody"))
ENCODE_overlap_plot = ggplot(ENCODE_overlaps_H3K27me3_melt, aes(Antibody, value)) +
geom_bar(aes(fill = variable),
width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +
ylim(0, 100) +
ylab("% of ENCODE peaks captured") +
xlab("Antibody") +
ggtitle("ENCODE peaks captured by C&T (H3K27me3)") +
theme_bw()
ENCODE_overlap_plotCT_in_ENCODE_H3K27me3_perc = ENCODE_overlaps_H3K27me3[c("Antibody", "Percentage_of_total_CT_noIgG", "Percentage_of_total_CT_IgG")]
colnames(CT_in_ENCODE_H3K27me3_perc) = c("Antibody", "no IgG", "IgG")
CT_in_ENCODE_H3K27me3_melt = melt(CT_in_ENCODE_H3K27me3_perc, id.vars = c("Antibody"))
ENCODE_overlap_plot = ggplot(CT_in_ENCODE_H3K27me3_melt, aes(Antibody, value)) +
geom_bar(aes(fill = variable),
width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +
ylim(0, 100) +
ylab("% of sample peaks in ENCODE") +
xlab("Antibody") +
ggtitle("C&T peaks falling into ENCODE (H3K27me3)") +
theme_bw()
ENCODE_overlap_plotchrHMM.url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeBroadHmm/wgEncodeBroadHmmK562HMM.bed.gz"
chrHMM = genomation::readBed(chrHMM.url)
chrHMM.list = GenomicRanges::split(chrHMM, chrHMM$name, drop = TRUE)